---
title: "SF3B1 splicing analysis"
subtitle: "Splicing maps"
date: "`r format(Sys.time(), '%B %e, %Y')`"
author:
- name: "Dr. Mirko Brueggemann"
email: mirko.brueggemann@bmls.de
affiliations:
- name: Buchman Institute for Molecular Life Sciences
format:
html:
theme: sandstone
code-fold: TRUE
code-overflow: scroll
code-summary: "Show code"
code-tools: TRUE
toc: TRUE
toc-depth: 3
toc-location: left
number-sections: TRUE
self-contained: TRUE
fontsize: 11pt
crossref:
fig-title: '**Figure**'
fig-labels: arabic
title-delim: "**.**"
code-block-bg: "#EEEEEE"
editor:
markdown:
wrap: 120
---
# Analysis description
This report holds all analysis and plots used to create main figure 5 A-C with and all related supplementary figures.
## Load libraries
```{r}
#| label: libraries
#| message: false
library (knitr)
library (grid)
library (gridGraphics)
library (gridExtra)
library (scales)
library (reshape2)
library (ggplot2)
library (rtracklayer)
library (GenomicFeatures)
library (GenomicAlignments)
library (viridis)
library (tibble)
library (dplyr)
library (tidyr)
library (ComplexHeatmap)
library (kableExtra)
library (GenomicRanges)
library (BindingSiteFinder)
library (readr)
library (circlize)
library (patchwork)
library (ggsci)
library (ggbeeswarm)
library (zoo)
```
```{r}
#| label: load additional scripts
#| message: false
source ("../styles.R" )
source ("../helper.R" )
```
# Significant splicing events
```{r, formatRangesSplicing}
#| label: make granges from event table
#| message: false
# import table and format
table3As <- read_csv ("~/Projects/sf3b1/03_Splicing/results202212/golong_all_2022_12_diff_betabinomial_less_strict_3AS_all.csv" )
standChrs = c (paste0 ("chr" , seq (1 : 22 )), "chrX" , "chrY" )
table3As = table3As %>%
as.data.frame () %>%
select (gene, gene_id, chrom, strand, start, alternative, end, padj, all_mut_PSI, all_wt_PSI, total_PSI) %>%
rownames_to_column () %>%
subset (chrom %in% standChrs) %>%
mutate (end = end + 1 ) %>%
mutate (alternative = ifelse (strand == "+" , alternative + 1 , alternative)) %>%
mutate (deltaPSI = all_mut_PSI - all_wt_PSI) %>%
mutate (sig = ifelse (padj < 0.1 , TRUE , FALSE )) %>%
mutate (distAltEnd = ifelse (strand == "+" , (alternative - end)*- 1 , (start - alternative)*- 1 ))
table3As$ id = 1 : nrow (table3As)
# make granges
rngEnd = GRanges (
seqnames = table3As$ chrom,
ranges = IRanges (start = table3As$ start,
end = table3As$ end),
gene = table3As$ gene,
geneID = table3As$ gene_id,
strand = table3As$ strand,
padj = table3As$ padj,
deltaPSI = table3As$ deltaPSI,
sig = table3As$ sig,
distAltEnd = table3As$ distAltEnd,
id = table3As$ id,
wtPSI = table3As$ all_wt_PSI,
mutPSI = table3As$ all_mut_PSI
)
names (rngEnd) = c (1 : length (rngEnd))
table3AsPlus = subset (table3As, strand == "+" )
table3AsMinus = subset (table3As, strand == "-" )
rngAltPlus = GRanges (
seqnames = table3AsPlus$ chrom,
ranges = IRanges (start = table3AsPlus$ start,
end = table3AsPlus$ alternative),
gene = table3AsPlus$ gene,
geneID = table3AsPlus$ gene_id,
strand = table3AsPlus$ strand,
padj = table3AsPlus$ padj,
deltaPSI = table3AsPlus$ deltaPSI,
sig = table3AsPlus$ sig,
distAltEnd = table3AsPlus$ distAltEnd,
id = table3AsPlus$ id,
wtPSI = table3AsPlus$ all_wt_PSI,
mutPSI = table3AsPlus$ all_mut_PSI
)
rngAltMinus = GRanges (
seqnames = table3AsMinus$ chrom,
ranges = IRanges (start = table3AsMinus$ alternative,
end = table3AsMinus$ end),
gene = table3AsMinus$ gene,
geneID = table3AsMinus$ gene_id,
strand = table3AsMinus$ strand,
padj = table3AsMinus$ padj,
deltaPSI = table3AsMinus$ deltaPSI,
sig = table3AsMinus$ sig,
distAltEnd = table3AsMinus$ distAltEnd,
id = table3AsMinus$ id,
wtPSI = table3AsMinus$ all_wt_PSI,
mutPSI = table3AsMinus$ all_mut_PSI
)
rngAlt = c (rngAltPlus, rngAltMinus)
rngAlt = rngAlt[order (rngAlt$ id)]
names (rngAlt) = c (1 : length (rngAlt))
saveRDS (rngEnd, file = "./data/rngEnd.rds" )
saveRDS (rngAlt, file = "./data/rngAlt.rds" )
export (rngEnd, con = "./data/rngEnd.bed" , format = "BED" )
export (rngAlt, con = "./data/rngAlt.bed" , format = "BED" )
```
::: {.panel-tabset}
### 0-200
```{r, fig.width=10, fig.height=4}
#| message: false
#| warning: false
#| fig-width: 10
#| fig-height: 4
#| fig-cap: Significance of alternative 3' splicing events by distance between canonical and alternative splicing pairs.
df = rngEnd %>%
as.data.frame () %>%
mutate (region = ifelse (distAltEnd >= 12 & distAltEnd <= 21 , "Affected" , "Not Affected" ))
ggplot (df, aes (x = distAltEnd, y = padj, group = distAltEnd, fill = region)) +
geom_boxplot (outlier.size = 0.1 , lwd = 0.5 ) +
xlim (0 ,200 ) +
theme_pub () +
scale_fill_npg () +
theme (legend.position = "top" ) +
labs (
x = "Distance between splice sites [nt]" ,
y = "Adjusted P value" ,
fill = "Region type"
)
ggsave ("splicingEvents_200.pdf" , width = 10 , height = 4 , device = "pdf" )
```
### 0-100
```{r, fig.width=10, fig.height=4}
#| message: false
#| warning: false
#| fig-width: 10
#| fig-height: 4
#| fig-cap: Significance of alternative 3' splicing events by distance between canonical and alternative splicing pairs.
df = rngEnd %>%
as.data.frame () %>%
mutate (region = ifelse (distAltEnd >= 12 & distAltEnd <= 21 , "Affected" , "Not Affected" ))
ggplot (df, aes (x = distAltEnd, y = padj, group = distAltEnd, fill = region)) +
geom_boxplot (outlier.size = 0.1 , lwd = 0.5 ) +
xlim (0 ,100 ) +
theme_pub () +
scale_fill_npg () +
theme (legend.position = "top" ) +
labs (
x = "Distance between splice sites [nt]" ,
y = "Adjusted P value" ,
fill = "Region type"
)
ggsave ("splicingEvents_100.pdf" , width = 10 , height = 4 , device = "pdf" )
```
:::
## Zoom-in version
::: {.panel-tabset}
### Boxplot
```{r, fig.width=4, fig.height=4}
#| message: false
#| warning: false
#| fig-width: 4
#| fig-height: 4
ggplot (df, aes (x = distAltEnd, y = padj, group = distAltEnd, fill = region)) +
geom_boxplot (outlier.size = 0.1 , lwd = 0.5 ) +
xlim (0 ,50 ) +
theme_pub () +
scale_fill_npg () +
theme (legend.position = "top" ) +
labs (
x = "Distance between splice sites [nt]" ,
y = "Adjusted P value" ,
fill = "Region type"
)
ggsave ("splicingEvents_50.pdf" , width = 4 , height = 4 , device = "pdf" )
```
### Histogram
```{r, fig.width=4, fig.height=4}
#| message: false
#| warning: false
#| fig-width: 4
#| fig-height: 4
dfN = df %>%
filter (sig == TRUE ) %>%
group_by (distAltEnd) %>%
summarise (n = n ()) %>%
mutate (region = ifelse (distAltEnd >= 12 & distAltEnd <= 21 , "Affected" , "Not Affected" ))
ggplot (dfN, aes (x = distAltEnd, y = n, group = distAltEnd, fill = region)) +
geom_col (width = 0.9 ) +
xlim (0 ,50 ) +
theme_pub () +
scale_fill_npg () +
theme (legend.position = "top" ) +
labs (
x = "Distance between splice sites [nt]" ,
y = "#N [Significant]" ,
fill = "Region type"
)
ggsave ("splicingEvents_hist_50.pdf" , width = 4 , height = 4 , device = "pdf" )
```
:::
# Splicing meta profile maps
```{r}
#| label: load iCLIP signal
#| message: false
#|
# Load clip data SF3B1 WT
clipFilesWt = "/Users/mirko/Projects/sf3b1/01_data_subsamp/wt/cov/replicate"
clipFiles = c (clipFilesWt)
clipFiles = list.files (clipFiles, pattern = ".bw$" , full.names = TRUE )
clipFilesP = clipFiles[grep (clipFiles, pattern = "Plus" )]
clipFilesM = clipFiles[grep (clipFiles, pattern = "Minus" )]
# Organize clip data in dataframe
colData = data.frame (
id = 1 : 3 ,
condition = factor (c ("WT" , "WT" , "WT" )),
clPlus = clipFilesP,
clMinus = clipFilesM)
bdsSF3B1_WT = BSFDataSetFromBigWig (ranges = rngEnd, meta = colData)
# Load clip data SF3B1 MUT
clipFilesMut = "/Users/mirko/Projects/sf3b1/01_data_subsamp/mut/cov/replicate"
clipFiles = c (clipFilesMut)
clipFiles = list.files (clipFiles, pattern = ".bw$" , full.names = TRUE )
clipFilesP = clipFiles[grep (clipFiles, pattern = "Plus" )]
clipFilesM = clipFiles[grep (clipFiles, pattern = "Minus" )]
# Organize clip data in dataframe
colData = data.frame (
id = 1 : 2 ,
condition = factor (c ("MUT" , "MUT" )),
clPlus = clipFilesP,
clMinus = clipFilesM)
bdsSF3B1_MUT = BSFDataSetFromBigWig (ranges = rngEnd, meta = colData)
# Load clip data U2AF65 WT
clipFilesWt = "/Users/mirko/Projects/BindingSiteStrength/01_data/05_U2AF65_cellLines/K562/cov/replicates/"
clipFiles = c (clipFilesWt)
clipFiles = list.files (clipFiles, pattern = ".bw$" , full.names = TRUE )
clipFilesP = clipFiles[grep (clipFiles, pattern = "plus" )]
clipFilesM = clipFiles[grep (clipFiles, pattern = "minus" )]
# Organize clip data in dataframe
colData = data.frame (
id = 1 : 4 ,
condition = factor (rep ("WT" ,4 )),
clPlus = clipFilesP,
clMinus = clipFilesM)
# creating objects and importing clip siganl
bdsU2AF2_WT = BSFDataSetFromBigWig (ranges = rngEnd, meta = colData)
# Load eclip data SF3B1 WT
clipFilesWt = "/Users/mirko/Projects/sf3b1/04_eCLIP/sf3b1/"
clipFiles = c (clipFilesWt)
clipFiles = list.files (clipFiles, pattern = ".bw$" , full.names = TRUE )
clipFilesP = clipFiles[grep (clipFiles, pattern = "plus" )]
clipFilesM = clipFiles[grep (clipFiles, pattern = "minus" )]
# Organize clip data in dataframe
colData = data.frame (
id = 1 : 2 ,
condition = factor (rep ("WT" ,2 )),
clPlus = clipFilesP,
clMinus = clipFilesM)
# creating objects and importing clip siganl
bdsSF3B1_ECLIP = BSFDataSetFromBigWig (ranges = rngEnd, meta = colData)
clipData = list (bdsSF3B1_WT = bdsSF3B1_WT,
bdsSF3B1_MUT = bdsSF3B1_MUT,
bdsU2AF2_WT = bdsU2AF2_WT,
bdsSF3B1_ECLIP = bdsSF3B1_ECLIP)
```
For meta profiles iCLIP crosslinks are aligned at the canonical and alternative splice sites, in a symmetrical 200nt wide window. From all events (#N=`r myFormat(length(rngEnd))` ), only those with a distance from 0-100nt are shown (#N=`r myFormat(length(rngEnd[rngEnd$distAltEnd <= 100]))` ). Note that for each plot all events with zero crosslinks are also removed. The final number of ranges used is indicated above each plot.
```{r, functionHmBasic}
makeHmMatrixList <- function (clipSet, plot.topLim.size = 1 ){
### --------------------------------------------------------------------------
### Ranges
### --------------------------------------------------------------------------
# select ranges
# -> select within 100nt distance
rngEndCurr = rngEnd
rngAltCurr = rngAlt
rngEndCurr = rngEnd[rngEnd$ distAltEnd <= 100 ]
rngAltCurr = rngAlt[rngAlt$ distAltEnd <= 100 ]
# shuffle ranges around
# -> to avoid auto sorting by P value
set.seed (1234 )
reorderIdx = sample (1 : length (rngEndCurr))
rngEndCurr = rngEndCurr[reorderIdx,]
rngAltCurr = rngAltCurr[reorderIdx,]
### --------------------------------------------------------------------------
### Coverage
### --------------------------------------------------------------------------
# coverage 3'AS outer position
bdsSel = setRanges (clipSet, resize (rngEndCurr, fix = "end" , width = 1 ) + 200 )
cov1 = coverageOverRanges (bdsSel, returnOptions = "merge_all_replicates" , method = "mean" )
m1 = cov1
# coverage 3'AS inner position
bdsSel = setRanges (clipSet, resize (rngAltCurr, fix = "end" , width = 1 ) + 200 )
cov2 = coverageOverRanges (bdsSel, returnOptions = "merge_all_replicates" , method = "mean" )
m2 = cov2
# sorting by distance to 3'AS
distIdx = order (rngEndCurr$ distAltEnd, decreasing = FALSE )
m1 = m1[distIdx,]
m2 = m2[distIdx,]
# remove rows with no signal
m1 = m1[rowSums (m1) > 0 ,]
m2 = m2[rowSums (m2) > 0 ,]
# scale values in matrix for plotting
m1 = minMaxNorm (m1)
m2 = minMaxNorm (m2)
m1 = t (apply (m1, 1 , smoothing, lambda= 0.05 , dim= 401 ))
m2 = t (apply (m2, 1 , smoothing, lambda= 0.05 , dim= 401 ))
# combine matrices
keep = intersect (rownames (m1), rownames (m2))
m1 = m1[rownames (m1) %in% keep,]
m2 = m2[rownames (m2) %in% keep,]
### --------------------------------------------------------------------------
### Annotations
### --------------------------------------------------------------------------
# set reference range for annotations
range = rngEndCurr
currM = m1
# annotate with distance between alternative and constitutive 3'ss
dfDist = data.frame (dist = range$ distAltEnd[as.numeric (rownames (currM))])
dfDist$ dist[dfDist$ dist > 100 ] = 100
haDist = rowAnnotation (
dist = anno_points (dfDist, size = unit (2 , "points" ), gp = gpar (col = "#595959" )),
width = unit (1 , "cm" ), annotation_name_gp = gpar (fontsize = 6 )
)
# annotate with dPSI/ PSI values
df = data.frame (deltaPSI = range$ deltaPSI[as.numeric (rownames (currM))] * - 1 )
haDeltaPSI = rowAnnotation (
deltaPSI = anno_points (df, size = unit (2 , "points" ), gp = gpar (col = "#4c435a" , alpha = 1 ), ylim = c (- 1 ,1 ) ),
width = unit (2 , "cm" ), annotation_name_gp = gpar (fontsize = 6 )
)
df = data.frame (psiWT = range$ wtPSI[as.numeric (rownames (currM))] * - 1 )
haWtPSI = rowAnnotation (
psiWT = anno_points (df, size = unit (2 , "points" ), gp = gpar (col = "#0086b3" , alpha = 0.5 )), # , ylim = c(-1,0)
width = unit (2 , "cm" ), annotation_name_gp = gpar (fontsize = 6 )
)
df = data.frame (psiMUT = range$ mutPSI[as.numeric (rownames (currM))] * - 1 )
haMutPSI = rowAnnotation (
psiMUT = anno_points (df, size = unit (2 , "points" ), gp = gpar (col = "#990000" , alpha = 0.5 )), # , ylim = c(-1,0)
width = unit (2 , "cm" ), annotation_name_gp = gpar (fontsize = 6 )
)
# annotate with significant results
sig = range$ sig[as.numeric (rownames (currM))]
haSig = rowAnnotation (sigRes = sig, col = list (sigRes = c ("FALSE" = "white" , "TRUE" = "black" )),
gp = gpar (lwd = 0.1 ),
show_annotation_name= F, annotation_name_gp = gpar (fontsize = 6 ),
annotation_legend_param = list (sigRes = list (nrow = 1 , direction = "horizontal" ))
)
# split by range
split = factor (ifelse (dfDist$ dist >= 12 & dfDist$ dist <= 21 , "2" ,
ifelse (dfDist$ dist <= 12 & dfDist$ dist <= 21 , "1" , "3" )),
levels = c ("1" , "2" , "3" ))
### --------------------------------------------------------------------------
### Plotting
### --------------------------------------------------------------------------
# use raw-data for barplot on top
mm1 = cov1[rownames (cov1) %in% rownames (m1),]
mm2 = cov2[rownames (cov2) %in% rownames (m2),]
mm1 = mm1[,c (30 : 250 )]
mm2 = mm2[,c (100 : 320 )]
# formatting of matrix outline
rownames (m1) = NULL
colnames (m1) = - 200 : 200
colnames (m1)[as.numeric (colnames (m1)) %% 100 != 0 ] = ""
rownames (m2) = NULL
colnames (m2) = - 200 : 200
colnames (m2)[as.numeric (colnames (m2)) %% 100 != 0 ] = ""
# change heatmap plotting frame
m1 = m1[,c (30 : 250 )]
m2 = m2[,c (100 : 320 )]
# annotate with top col means profile
df1 = data.frame (sums = colMeans (mm1))
haMeans1 = HeatmapAnnotation (cov = anno_barplot (df1, gp = gpar (fill = "#595959" , col = "#595959" ), ylim = c (0 , plot.topLim.size)),
height = unit (2 , "cm" ), show_annotation_name= F, annotation_name_gp = gpar (fontsize = 6 ))
df2 = data.frame (sums = colMeans (mm2))
haMeans2 = HeatmapAnnotation (cov = anno_barplot (df2, gp = gpar (fill = "#595959" , col = "#595959" ), ylim = c (0 , plot.topLim.size)),
height = unit (2 , "cm" ), show_annotation_name= F, annotation_name_gp = gpar (fontsize = 6 ))
# color scale
custom.col = colorRamp2 (c (0 ,0.05 ,0.1 ,0.15 ,0.2 ,0.25 ), viridis (6 , option = "mako" , direction = - 1 ))
# plot heatmap
h1 = Heatmap (m1,
column_title = "3'AS outer" , name = "iCLIP signal" ,
cluster_rows = FALSE , cluster_columns = FALSE ,
show_column_names = TRUE , show_row_names = FALSE ,
col = custom.col,
row_split = split, row_gap = unit (0.1 , "cm" ),
column_names_gp = gpar (fontsize = 8 ),
top_annotation = haMeans1,
right_annotation = c (haSig, haDeltaPSI, haWtPSI, haMutPSI),
border = TRUE ,
use_raster = TRUE , raster_by_magick = TRUE ,
raster_magick_filter = "Spline" ,
raster_quality = 1 , raster_resize_mat = mean,
heatmap_legend_param = list (
legend_width = unit (6 , "cm" ),
title_position = "topleft" , direction = "horizontal"
)
)
h2 = Heatmap (m2,
column_title = "3'AS inner" , name = "iCLIP signal" ,
cluster_rows = FALSE , cluster_columns = FALSE ,
show_column_names = TRUE , show_row_names = FALSE ,
col = custom.col,
row_split = split, row_gap = unit (0.1 , "cm" ),
top_annotation = haMeans2,
left_annotation = c (haDist),
column_names_gp = gpar (fontsize = 8 ),
border = TRUE ,
use_raster = TRUE , raster_by_magick = TRUE ,
raster_magick_filter = "Spline" ,
raster_quality = 1 , raster_resize_mat = mean,
heatmap_legend_param = list (
legend_width = unit (6 , "cm" ),
title_position = "topleft" , direction = "horizontal"
)
)
plotList = h2 + h1
matDim = dim (m1)
l = list (plotList = plotList, matDim = matDim)
return (l)
}
```
::: {.panel-tabset}
### SF3B1-WT
```{r, fig.width=7, fig.height=5, fig.cap="\\textbf{Heatmap at 3'AS with SF3B1-WT signal.}"}
#| message: false
#| warning: false
#|
p = makeHmMatrixList (clipSet = clipData$ bdsSF3B1_WT, plot.topLim.size = 1 )
draw (p$ plotList, ht_gap = unit (0.2 , "cm" ), heatmap_legend_side = "bottom" , annotation_legend_side = "bottom" ,
column_title = paste0 ("SF3B1-WT (#N:" , format (p$ matDim[1 ], big.mark = "," ), ")" ))
ggsave ("hm1.pdf" , width = 7 , height = 5 , device = "pdf" )
```
### SF3B1-MUT
```{r, fig.width=7, fig.height=5, fig.cap="\\textbf{Heatmap at 3'AS with SF3B1-MUT signal.}"}
#| message: false
#| warning: false
#|
p = makeHmMatrixList (clipSet = clipData$ bdsSF3B1_MUT, plot.topLim.size = 1 )
draw (p$ plotList, ht_gap = unit (0.2 , "cm" ), heatmap_legend_side = "bottom" , annotation_legend_side = "bottom" ,
column_title = paste0 ("SF3B1-MUT (#N:" , format (p$ matDim[1 ], big.mark = "," ), ")" ))
```
### U2AF2-WT
```{r, fig.width=7, fig.height=5, fig.cap="\\textbf{Heatmap at 3'AS with U2AF2-WT signal.}"}
#| message: false
#| warning: false
#|
p = makeHmMatrixList (clipSet = clipData$ bdsU2AF2_WT, plot.topLim.size = 3 )
draw (p$ plotList, ht_gap = unit (0.2 , "cm" ), heatmap_legend_side = "bottom" , annotation_legend_side = "bottom" ,
column_title = paste0 ("U2AF2-WT (#N:" , format (p$ matDim[1 ], big.mark = "," ), ")" ))
```
### SF3B1-eCLIP
```{r, fig.width=7, fig.height=5, fig.cap="\\textbf{Heatmap at 3'AS with SF3B1-eCLIP signal.}"}
#| message: false
#| warning: false
#|
p = makeHmMatrixList (clipSet = clipData$ bdsSF3B1_ECLIP, plot.topLim.size = 0.05 )
draw (p$ plotList, ht_gap = unit (0.2 , "cm" ), heatmap_legend_side = "bottom" , annotation_legend_side = "bottom" ,
column_title = paste0 ("SF3B1-eCLIP (#N:" , format (p$ matDim[1 ], big.mark = "," ), ")" ))
```
:::
# PSI distance plots
The following plots show at which distances the canonical splice sites is preferred over the alternative and vice versa. For the zoom out version usage fraction per distance is computed as a rolling mean over 20nt. For the zoom in fractions are shown on a per nt level.
::: {.panel-tabset}
### Zoom-out
```{r, fig.width=4, fig.height=4}
#| message: false
#| warning: false
#| fig-width: 4
#| fig-height: 4
#|
df = rngEnd %>% as.data.frame () %>%
mutate (sel = ifelse (wtPSI < 0.5 , "Low" , "High" )) %>%
group_by (distAltEnd, sel) %>%
summarise (n = n (), .groups = "keep" ) %>%
pivot_wider (values_from = n, names_from = sel) %>%
mutate (distAltEnd = as.integer (distAltEnd)) %>%
mutate (High = ifelse (is.na (High), 0 , High)) %>%
mutate (Low = ifelse (is.na (Low), 0 , Low)) %>%
ungroup () %>%
mutate (rs_high = rollapply (data = High, width = 20 , FUN = mean, align = "left" , fill = NA )) %>%
mutate (rs_low = rollapply (data = Low, width = 20 , FUN = mean, align = "left" , fill = NA )) %>%
group_by (distAltEnd) %>%
summarise (High = sum (rs_high), Low = sum (rs_low), Sum = sum (rs_high, rs_low), .groups = "keep" ) %>%
mutate (FracHigh = High/ Sum, FracLow = Low/ Sum) %>%
select (distAltEnd, FracHigh, FracLow) %>%
pivot_longer (- distAltEnd) %>%
mutate (bin = as.numeric (distAltEnd)) %>%
drop_na ()
ggplot () +
geom_line (data = df, aes (x = bin, y = value, color = name, group = name), size = 0.5 ) +
geom_smooth (data = df, aes (x = bin, y = value, color = name, group = name),
method = "loess" , se = FALSE , size = 1 , span = 0.1 ) +
theme_pub () +
scale_color_npg (name = "3' splice site usage" , labels = c ("3'SS (constitutive)" , "*3'SS (alternative)" )) +
labs (
x = "Splice site distance (running mean [nt])" ,
y = "Usage fraction" ,
color = "WT inclusion level"
) +
theme (legend.position = "top" , aspect.ratio = 1 ) +
xlim (0 ,250 )
ggsave ("psiDistance_zoom_out.pdf" , width = 4 , height = 4 , device = "pdf" )
```
### Zoom-in
```{r, fig.width=4, fig.height=4}
#| message: false
#| warning: false
#| fig-width: 4
#| fig-height: 4
df = rngEnd %>% as.data.frame () %>%
mutate (sel = ifelse (wtPSI < 0.5 , "Low" , "High" )) %>%
group_by (distAltEnd, sel) %>%
summarise (n = n ()) %>%
pivot_wider (values_from = n, names_from = sel) %>%
mutate (distAltEnd = as.integer (distAltEnd)) %>%
mutate (High = ifelse (is.na (High), 0 , High)) %>%
mutate (Low = ifelse (is.na (Low), 0 , Low)) %>%
group_by (distAltEnd) %>%
summarise (High = sum (High), Low = sum (Low), Sum = sum (High, Low)) %>%
mutate (FracHigh = High/ Sum, FracLow = Low/ Sum) %>%
select (distAltEnd, FracHigh, FracLow) %>%
pivot_longer (- distAltEnd) %>%
mutate (bin = as.numeric (distAltEnd))
dfN = rngEnd %>% as.data.frame () %>%
select (distAltEnd, wtPSI) %>%
mutate (bin = cut_width (distAltEnd, width = 10 , boundary = 0 )) %>%
mutate (bin = as.numeric (bin)) %>%
select (c (bin, wtPSI))
ggplot () +
ggrastr:: rasterise (geom_quasirandom (data = dfN, aes (x = bin, y = wtPSI), color = "#808080" , size = 0.1 , alpha = 0.5 ), dpi = 300 ) +
geom_line (data = df, aes (x = bin, y = value, color = name, group = name), size = 0.8 ) +
geom_smooth (data = df, aes (x = bin, y = value, color = name, group = name),
method = "loess" , se = FALSE , size = 1 , span = 0.3 ) +
theme_pub () +
scale_color_npg (name = "3' splice usage" , labels = c ("3'SS (canonical)" , "*3'SS (alternative)" )) +
labs (
x = "Splice site distance [nt]" ,
y = "Usage fraction" ,
color = "WT inclusion level"
) +
theme (legend.position = "top" ) +
scale_y_continuous (
breaks = c (seq (from = - 0.2 , to = 1.2 , by = 0.2 )),
name = "Usage fraction" ,
sec.axis = sec_axis (~ ., name = "WT-PSI distribution" )
) +
xlim (0 ,50 )
ggsave ("psiDistance_zoom_in.pdf" , width = 4 , height = 4 , device = "pdf" )
```
### Zoom-in 2
```{r, fig.width=4, fig.height=4}
#| message: false
#| warning: false
#| fig-width: 4
#| fig-height: 4
ggplot () +
ggrastr:: rasterise (geom_quasirandom (data = dfN, aes (x = bin, y = wtPSI), color = "#808080" , size = 0.1 , alpha = 0.5 ), dpi = 300 ) +
geom_line (data = df, aes (x = bin, y = value, color = name, group = name), size = 0.8 ) +
geom_smooth (data = df, aes (x = bin, y = value, color = name, group = name),
method = "loess" , se = FALSE , size = 1 , span = 0.3 ) +
theme_pub () +
scale_color_npg (name = "3' splice usage" , labels = c ("3'SS (canonical)" , "*3'SS (alternative)" )) +
labs (
x = "Splice site distance [nt]" ,
y = "Usage fraction" ,
color = "WT inclusion level"
) +
theme (legend.position = "top" ) +
scale_y_continuous (
breaks = c (seq (from = - 0.2 , to = 1.2 , by = 0.2 )),
name = "Usage fraction" ,
sec.axis = sec_axis (~ ., name = "WT-PSI distribution" )
) +
xlim (0 ,30 )
ggsave ("psiDistance_zoom_in2.pdf" , width = 4 , height = 4 , device = "pdf" )
```
:::
# Session Information
```{r, session_info, include=TRUE, echo=TRUE, results='markup'}
sessionInfo ()
```